Authors:
Tahmidul Azom Sany (tsany@gmu.edu)
NumPy, pandas, and Matplotlib are popular Python libraries used in climate science for data analysis, manipulation, and visualization. NumPy provides efficient arrays for handling large datasets and mathematical operations. Pandas offers data structures for tabular data manipulation, while Matplotlib allows for creating various types of visualizations. Together, these libraries provide a comprehensive toolkit for climate scientists to handle data, perform analysis, and create visualizations for interpreting and communicating climate-related findings. In this tutorial we will explore some of the features of these three packages in Python.
Prerequisite: It is recommended for users to have a basic understanding of the Python programming language before proceeding with this tutorial.
# Importing NumPy library
import numpy as np
# Creating a numpy array
arr = np.array([1, 2, 3, 4, 5])
arr # or print(arr) both will print the array created
# Accessing elements of a numpy array
print("Element at index 0:", arr[0])
print("Element at index 2:", arr[2])
print("Element at last index :", arr[-1])
# Slicing
print("All Element in that array:", arr[:])
print("First three Element :", arr[:3])
print("Last three Element :", arr[2:])
# Performing basic operations on the array
print("Sum of elements in the array:", np.sum(arr))
print("Product of elements in the array:", np.prod(arr))
print("Mean of elements in the array:", np.mean(arr))
print("Minimum value in the array:", np.min(arr))
print("Maximum value in the array:", np.max(arr))
print("Square root of elements in the array:", np.sqrt(arr))
print("Standard deviation of elements in the array:", np.std(arr))
print("Array sorted in ascending order:", np.sort(arr))
# Creating a 2D numpy array
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("Shape of the array: ", matrix.shape)
matrix
# Accessing elements of a 2D numpy array
print("Element at row 0 and column 1:", matrix[0][1])
print("Element at row 2 and column 2:", matrix[2][2])
# Performing operations on 2D numpy arrays (note the axis value)
print("Sum of elements in each row:", np.sum(matrix, axis=1))
print("Sum of elements in each column:", np.sum(matrix, axis=0))
# Performing operations on the matrix
print("Sum of elements in the matrix:", np.sum(matrix))
print("Product of elements in the matrix:", np.prod(matrix))
print("Mean of elements in the matrix:", np.mean(matrix))
print("Minimum value in the matrix:", np.min(matrix))
print("Maximum value in the matrix:", np.max(matrix))
print("Transpose of the matrix:\n", np.transpose(matrix)) # "\n" is used for line space
print("Matrix sorted in ascending order along rows:\n", np.sort(matrix, axis=1))
print("Matrix sorted in ascending order along columns:\n", np.sort(matrix, axis=0))
# Reshaping the array (one of the mostly used features)
array = np.arange(1, 11) # create a array
print("inital array: ", array)
array_reshaped = np.reshape(array, (2, 5))
print("final array after reshaping: \n ", array_reshaped)
# Performing mathematical operations on numpy arrays
arr2 = np.array([1, 3, 5, 7, 9])
arr3 = np.array([2, 4, 6, 8, 10])
print("Addition of arrays:", np.add(arr2, arr3))
print("Subtraction of arrays:", np.subtract(arr2, arr3))
print("Multiplication of arrays:", np.multiply(arr2, arr3))
print("Division of arrays:", np.divide(arr2, arr3))
# Using numpy functions to perform mathematical operations
print("Square root of elements:", np.sqrt(arr2))
print("Exponential of elements:", np.exp(arr2))
print("Logarithm of elements:", np.log(arr2))
# Importing the Pandas library
import pandas as pd
# Creating a sample DataFrame
name = ['Alice', 'Bob', 'Charlie', 'Dave', 'Eve']
age = [25, 30, 35, 40, 45]
city = ['New York', 'Los Angeles', 'Chicago', 'San Francisco', 'Seattle']
df = pd.DataFrame() # Create a empty dataframe
df['Name'] = name # Adding values to Dataframe, you can name the column anything
df['Age'] = age
df['City'] = city
# Displaying the DataFrame
print("Original DataFrame:")
df
# Accessing columns
print("\nAccessing Columns:")
print(df['Name']) # Accessing a single column
print(df[['Name', 'Age']]) # Accessing multiple columns
# Accessing rows (Try run one by one for better understanding)
print("\nAccessing Rows:")
print(df.loc[2]) # Accessing a single row by label
print(df.iloc[2]) # Accessing a single row by index
print(df[1:4]) # Accessing multiple rows using slicing
# Data manipulation
print("\nData Manipulation:")
df['City'] = df['City'].str.upper() # Modifying values in a column
df['Country'] = 'USA' # Adding a new column with constant value
df.drop('Age', axis=1, inplace=True) # Removing a column
df.rename(columns={'Name': 'Full Name'}, inplace=True) # Renaming a column
df.sort_values(by='Full Name', inplace=True) # Sorting DataFrame by a column
df.reset_index(drop=True, inplace=True) # Resetting index after sorting
df['City'] = df['City'].apply(lambda x: x.replace(' ', '-')) # Applying a function to a column
df
# Data analysis
print("\nData Analysis:")
print(df.describe()) # Descriptive statistics of numeric columns
print("----------------------------------------")
print(df['Full Name'].value_counts()) # Counting occurrences of values in a column
import matplotlib.pyplot as plt
#Prepare the data
x = [0,1,2,3,4]
y = [0,2,4,6,8]
#ploting data
plt.plot(x,y)
#show graph
plt.show()
x = [0,1,2,5,4]
y = [0,2,3,2,8]
z = [0,3,6,9,12]
#figure size
plt.figure(figsize=(8,5), dpi= 100) #dot per inch or pixel per inch
#ploting data
plt.plot(x,y, color ='r', label = 'X and Y')
plt.plot(x,z, 'b--', label = 'X and Z')
#labeling x,y axis
plt.xlabel("X-Axis")
plt.ylabel("Y-Axis")
#tickmark
plt.xticks([0,2,4,6])
#Provide title
plt.title("Simple Plot", fontsize = 18)
plt.legend()
plt.show()
# Plotting Scatterplot
x = np.random.normal(5.0, 1.0, 100)
y = np.random.normal(10.0, 2.0, 100)
plt.scatter(x, y)
plt.show()